knitr::opts_chunk$set(echo = FALSE, fig.align = "center", fig.keep = "all" ) library(tidyr) library(dplyr) library(hyperSpec) library(NMF) library(ggthemes) library(xtable) library(GGally) library(corrplot) library(pander) source('D:/Data/R/R Learning/plotSpFun.R', encoding = 'UTF-8') library(rpart) library(randomForest) library(caret) library(rattle) library(doParallel) cl <- makeCluster(8) registerDoParallel(cl) corrMethod <- "spearman"
Šiame pavizdyje naudojamas atsitiktinių miškų (angl. random forests) klasifikatorius (10 tūkst. sprendimų medžių).
Rezultatų patikrinimas vykdomas kryžminio skaidymo į 5 dalis metodu (angl. 5-fold cross-validation).
# load('2016-01-08.RData') load('2016-01-08 (nRuns = 40).RData')
# Set random seed. Don't remove this line. set.seed(123) # k - number of folds k = 5 # Initialize the accs vector accs <- rep(0, k) # Indices for training set # indeksai cross-validacijai # # AMP_obj <- scoresNMF # AMP_obj <- AMP_obj[AMP_obj$gr!= "S",,] # AMP_obj$BoosGr <- as.factor(AMP_obj$BoosGr) # levels(AMP_obj$Safranin) <- c("0-1","0-1","2-3","2-3") # BoosGr <- cut(AMP_obj$Boos,breaks = c(8,14,23)) AMP_obj$BoosGr <- BoosGr pander( AMP_obj2 <- AMP_obj; AMP_obj2$gr <- factor(AMP_obj2$BoosGr) indices <- createFolds_strat(AMP_obj2$..[,c("ID","gr")], k = k, returnTrain = TRUE) i = 1 # # conf = vector("list", k) conf2 = vector("list", k)
for (i in 1:k) { print(i) # Exclude them from the train set training <- AMP_obj[-indices[[i]],] # Include them in the test set test <- AMP_obj[indices[[i]],] # # A model is learned using each training set # tree <- rpart(gr ~ spc, training, method = "class") # # # Draw the decision tree # fancyRpartPlot(tree) # A model is learned using each training set my_forest <- train(BoosGr ~ spc + Safranin, data = training, method = 'parRF', importance = TRUE, ntree = 1000 ) # Make a prediction on the test set using randomForest pred <- predict(my_forest,test) varImpPlot(my_forest$finalModel) # Assign the confusion matrix to conf # conf <- table(test$gr, pred) conf <- table(test$BoosGr, pred) # pander(conf) # print(confusionMatrix(test$gr, pred)) print(confusionMatrix(test$BoosGr, pred)) # Assign the accuracy of this model to the ith index in accs accs[i] <- sum(diag(conf))/sum(conf) }
# Print out the mean of accs pander(data.frame(Pakartojimas = factor(c(1:k, 'vidutinis')), Tikslumas = c(accs,mean(accs))))
Galutinis atsakymas: tikslumas = r mean(accs)
